In [1]:
import mxnet as mx
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from mxnet import gluon
In [2]:
data_ctx = mx.cpu()
model_ctx = mx.cpu()
In [3]:
num_inputs = 784
num_outputs = 10
batch_size = 64
num_examples = 60000
In [4]:
# MNIST data pre-processing
def transform(data, label):
return data.astype(np.float32) / 255, label.astype(np.float32)
In [5]:
train_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=True, transform=transform),
batch_size,
shuffle=True)
test_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=False, transform=transform),
batch_size,
shuffle=False)
In [6]:
num_hidden = 256
weight_scale = .01
In [7]:
#######################
# Allocate parameters for the first hidden layer
#######################
W1 = mx.nd.random_normal(shape=(num_inputs, num_hidden),
scale=weight_scale,
ctx=model_ctx)
b1 = mx.nd.random_normal(shape=num_hidden,
scale=weight_scale,
ctx=model_ctx)
#######################
# Allocate parameters for the second hidden layer
#######################
W2 = mx.nd.random_normal(shape=(num_hidden, num_hidden),
scale=weight_scale,
ctx=model_ctx)
b2 = mx.nd.random_normal(shape=num_hidden,
scale=weight_scale,
ctx=model_ctx)
#######################
# Allocate parameters for the output layer
#######################
W3 = mx.nd.random_normal(shape=(num_hidden, num_outputs),
scale=weight_scale,
ctx=model_ctx)
b3 = mx.nd.random_normal(shape=num_outputs,
scale=weight_scale,
ctx=model_ctx)
params = [W1, b1, W2, b2, W3, b3]
In [8]:
## Attaching gradients
for param in params:
param.attach_grad()
In [9]:
# ReLU
def relu(X):
return mx.nd.maximum(X, mx.nd.zeros_like(X))
In [10]:
# Softmax
def softmax(y_linear):
exp = mx.nd.exp(y_linear - mx.nd.max(y_linear))
partition = mx.nd.nansum(data=exp,
axis=0,
exclude=True).reshape((-1, 1))
return exp / partition
In [11]:
def cross_entropy(yhat, y):
return - mx.nd.nansum(data=(y * mx.nd.log(yhat)),
axis=0,
exclude=True)
In [12]:
def softmax_cross_entropy(yhat_linear, y):
return - mx.nd.nansum(y * mx.nd.log_softmax(yhat_linear),
axis=0,
exclude=True)
In [13]:
def net(X):
#######################
# Compute the first hidden layer
#######################
h1_linear = mx.nd.dot(X, W1) + b1
h1 = relu(h1_linear)
#######################
# Compute the second hidden layer
#######################
h2_linear = mx.nd.dot(h1, W2) + b2
h2 = relu(h2_linear)
#######################
# Compute the output layer.
# We will omit the softmax function here
# because it will be applied
# in the softmax_cross_entropy loss
#######################
yhat_linear = mx.nd.dot(h2, W3) + b3
return yhat_linear
In [14]:
def SGD(params, lr):
for param in params:
param[:] = param - lr * param.grad
In [15]:
def evaluate_accuracy(data_iterator, net):
numerator = 0.
denominator = 0.
for i, (data, label) in enumerate(data_iterator):
data = data.as_in_context(model_ctx).reshape((-1, 784))
label = label.as_in_context(model_ctx)
output = net(data)
predictions = mx.nd.argmax(output, axis=1)
numerator += mx.nd.sum(predictions == label)
denominator += data.shape[0]
return (numerator / denominator).asscalar()
In [16]:
epochs = 10
learning_rate = .001
smoothing_constant = .01
In [17]:
for e in tqdm(range(epochs)):
cumulative_loss = 0
for i, (data, label) in enumerate(train_data):
data = data.as_in_context(model_ctx).reshape((-1, 784))
label = label.as_in_context(model_ctx)
label_one_hot = mx.nd.one_hot(label, 10)
with mx.autograd.record():
output = net(data)
loss = softmax_cross_entropy(output, label_one_hot)
loss.backward()
SGD(params, learning_rate)
cumulative_loss += mx.nd.sum(loss).asscalar()
test_accuracy = evaluate_accuracy(test_data, net)
train_accuracy = evaluate_accuracy(train_data, net)
print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
(e, cumulative_loss/num_examples, train_accuracy, test_accuracy))
In [18]:
# Define the function to do prediction
def model_predict(net,data):
output = net(data)
return mx.nd.argmax(output, axis=1)
In [19]:
samples = 10
In [20]:
# let's sample 10 random data points from the test set
sample_data = gluon.data.DataLoader(dataset=gluon.data.vision.MNIST(train=False, transform=transform),
batch_size = samples, shuffle=True)
for i, (data, label) in enumerate(sample_data):
data = data.as_in_context(model_ctx)
im = mx.nd.transpose(data,(1, 0, 2, 3))
im = mx.nd.reshape(im,(28, 10*28, 1))
imtiles = mx.nd.tile(im, (1, 1, 3))
plt.imshow(imtiles.asnumpy())
plt.show()
pred=model_predict(net,data.reshape((-1, 784)))
print('model predictions are:', pred)
print('true labels :', label)
break